# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 10_heterogeneity_analyses.R
### This script conducts heterogeneous effects analyses.
# ----
# Functions ----
# function to tabulate HTE regressions for each dv
tab_regression_hte <- function(varname, hte_covar, design) {
  fml <- as.formula(str_c(varname, "~treatment", "+", hte_covar, "+", "treatment", "*", hte_covar, "+ library_spillover_pre"))
  mod <- svyglm(fml, design = design)
  out <- tidy(mod) %>%
    mutate(dv = varname, n = nobs(mod), covariate = hte_covar, .before = 1) %>%
    mutate(interaction = case_when(term == paste0("treatmentMedia Literacy:", covariate) ~ TRUE,
                                   TRUE ~ FALSE))
  out
}

tab_regression_hte_teacher <- function(varname, hte_covar, design) {
  fml <- as.formula(str_c(varname, "~treatment", "+", hte_covar, "+", "treatment", "*", hte_covar, "+ district_name"))
  mod <- svyglm(fml, design = design)
  out <- tidy(mod) %>%
    mutate(dv = varname, n = nobs(mod), covariate = hte_covar, .before = 1) %>%
    mutate(interaction = case_when(term == paste0("treatmentMedia Literacy:", covariate) ~ TRUE,
                                   TRUE ~ FALSE))
  out
}

# Conduct main HTE analyses ----
## Class ----
regressions_hte_class <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "class", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_class <- left_join(dv_labs_endline_main, regressions_hte_class, by = "dv")

## Gender ----
regressions_hte_gender_num <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "gender_num", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_gender_num <- left_join(dv_labs_endline_main, regressions_hte_gender_num, by = "dv")

## Age ----
regressions_hte_age <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "age", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_age <- left_join(dv_labs_endline_main, regressions_hte_age, by = "dv")

## Scientific Beliefs ----
regressions_hte_non_scientific_health_beliefs_index <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "non_scientific_health_beliefs_index", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_non_scientific_health_beliefs_index <- left_join(dv_labs_endline_main, regressions_hte_non_scientific_health_beliefs_index, by = "dv")

## Mobile Internet ----
regressions_hte_mobile_internet_num <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "mobile_internet_num", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_mobile_internet_num <- left_join(dv_labs_endline_main, regressions_hte_mobile_internet_num, by = "dv")

## Asset Index ----
regressions_hte_asset_index <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "asset_index", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_asset_index <- left_join(dv_labs_endline_main, regressions_hte_asset_index, by = "dv")

## Media Exposure Index ----
regressions_hte_media_exposure_index <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "media_exposure_index", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_media_exposure_index <- left_join(dv_labs_endline_main, regressions_hte_media_exposure_index, by = "dv")

## Science Knowledge ----
regressions_hte_science <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "science", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_science <- left_join(dv_labs_endline_main, regressions_hte_science, by = "dv")

## National Election 2019 non-BJP vote ----
regressions_hte_nat_el_2019_non_bjp_num <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "nat_el_2019_non_bjp_num", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_nat_el_2019_non_bjp_num <- left_join(dv_labs_endline_main, regressions_hte_nat_el_2019_non_bjp_num, by = "dv")

## State Party ID 2020 non-BJP ----
regressions_hte_state_el_2020_non_bjp_num <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "state_el_2020_non_bjp_num", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_state_el_2020_non_bjp_num <- left_join(dv_labs_endline_main, regressions_hte_state_el_2020_non_bjp_num, by = "dv")

## Religion ----
regressions_hte_religion <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "religion_hindu_num", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_religion <- left_join(dv_labs_endline_main, regressions_hte_religion, by = "dv")

## Caste ----
regressions_hte_caste <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "caste_gen_num", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_caste <- left_join(dv_labs_endline_main, regressions_hte_caste, by = "dv")

## Spillover ----
regressions_hte_spillover <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "spillover_post", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_spillover <- left_join(dv_labs_endline_main, regressions_hte_spillover, by = "dv")

## Compliance ----
regressions_hte_compliance <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte(x, "compliance_minimal", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts
regressions_hte_compliance <- left_join(dv_labs_endline_main, regressions_hte_compliance, by = "dv")

## Survey Timing  ----
dv_labs_follow_main <- follow_up_dvs %>%
  filter(type == "idx")

regressions_hte_follow_days_from_first <- lapply(dv_labs_follow_main$dv, \(x) tab_regression_hte(x, "follow_days_from_first", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_follow_days_from_first <- left_join(dv_labs_follow_main, regressions_hte_follow_days_from_first, by = "dv")

## Summary plot ----
# Create summary df

regressions_hte_class$covariate_lab <- "Grade"
regressions_hte_gender_num$covariate_lab <- "Girl"
regressions_hte_non_scientific_health_beliefs_index$covariate_lab <- "Non-Scientific Beliefs"
regressions_hte_media_exposure_index$covariate_lab <- "Media Exposure"
regressions_hte_science$covariate_lab <- "Science Knowledge"
regressions_hte_state_el_2020_non_bjp_num$covariate_lab <- "State Election Non-BJP"
regressions_hte_spillover$covariate_lab <- "Spillover Stratum (low)"
regressions_hte_asset_index$covariate_lab <- "Asset Index"
regressions_hte_age$covariate_lab <- "Age"
regressions_hte_caste$covariate_lab <- "Gen. caste"
regressions_hte_religion$covariate_lab <- "Hindu"


regressions_hte_summary_df <- rbind(
  regressions_hte_gender_num,
  regressions_hte_age,
  regressions_hte_class,
  regressions_hte_asset_index,
  regressions_hte_non_scientific_health_beliefs_index,
  regressions_hte_media_exposure_index,
  regressions_hte_state_el_2020_non_bjp_num,
  regressions_hte_caste,
  regressions_hte_religion,
  regressions_hte_spillover)

regressions_hte_summary_df <- regressions_hte_summary_df %>%
  mutate(
    covariate_lab = factor(covariate_lab, 
                           levels = c(
                             "Age", "Grade", "Girl", "Spillover Stratum (low)", 
                             "State Election Non-BJP",
                             "Asset Index", "Media Exposure", "Non-Scientific Beliefs" , "Hindu" , "Gen. caste"
                            )))

color_palette <- c("#173750", "#4285f4", "#14A697", "#A5D5AF", "#B13D90", "#ffab40", "#8B008B", "darkred", "darkgreen", "yellow3" )

regressions_hte_summary_df %>%
  filter(str_detect(label, "Index")) %>%
  filter(is.na(subidx) & secondary_outcome == FALSE & mechanism == FALSE & interaction == TRUE) %>% # omit questions that are used in a subindex
  mutate(label = idx,
         label = as_factor(label),
         idx = as_factor(idx),
         significance = ifelse(p.value < 0.05, "YES", "NO")) %>%
ggplot(aes(x = estimate, y = as.numeric(covariate_lab), color = covariate_lab, linetype = significance)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point() +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0) +
  facet_grid(rows = "label", scales = "free_y", space = "free_y", switch = "y") + # facet by index
  xlab("Estimated Interaction Effects on Indices") +
  scale_y_discrete(position = "right") +
  scale_y_reverse() +  # Reverse the y-axis
  scale_color_manual(values = color_palette) +
  scale_linetype_manual(values=c("dotted", "solid")) +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "right") + 
  labs(color = "Covariate", linetype = "Significance") +
  guides(linetype = "none")
ggsave("output/figures/hte_summary_plot.pdf", height = 8, width = 8)

## Summary table ----
regressions_hte_summary_df <- regressions_hte_summary_df %>%
  filter(type == "idx")

regressions_hte_summary_df <- regressions_hte_summary_df %>%
  filter( term == "treatmentMedia Literacy:gender_num" | term=="treatmentMedia Literacy:non_scientific_health_beliefs_index" | term=="treatmentMedia Literacy:asset_index" | term=="treatmentMedia Literacy:media_exposure_index" | term=="treatmentMedia Literacy:state_el_2020_non_bjp_num" | term=="treatmentMedia Literacy:spillover_post" | term=="treatmentMedia Literacy:age" | term=="treatmentMedia Literacy:class" | term=="treatmentMedia Literacy:religion_hindu_num" | term=="treatmentMedia Literacy:caste_gen_num")

regressions_hte_table <- regressions_hte_summary_df %>%
  select(label, covariate, term, estimate, std.error,p.value, n) %>%
  arrange(term) %>%
  rename(N = n)  # Rename `n` to `N`

regressions_hte_table <- regressions_hte_table %>%
  filter(label != "Civic Participation Index") %>%
  pivot_wider(
    names_from = label,
    values_from = c(estimate, std.error, p.value, N)
  )

regressions_hte_table <- regressions_hte_table %>%
  pivot_longer(
    cols = starts_with("estimate_") | starts_with("std.error_") | starts_with("p.value_") | starts_with("N_"),
    names_to = c("stat", "label"),
    names_sep = "_",
    values_to = "value",
    values_transform = list(value = as.character)  # Convert to character to handle stars and numeric together
  )

p_values_hte_summary <- regressions_hte_table %>%
  filter(stat == "p.value") %>%
  select(label, term, p.value = value)  # Rename `value` to `p.value` for clarity

regressions_hte_table <- regressions_hte_table %>%
  left_join(p_values_hte_summary, by = c("label", "term"))

regressions_hte_table <- regressions_hte_table %>%
  mutate(
    # Round and add significance stars based on `p.value`
    value = case_when(
      stat == "estimate" ~ paste0(sprintf("%.3f", as.numeric(value)),
                                case_when(
          as.numeric(p.value) < 0.001 ~ "***",
          as.numeric(p.value) < 0.01 ~ "**",
          as.numeric(p.value) < 0.05 ~ "*",
          TRUE ~ ""
        )
      ),
      stat == "std.error" ~ sprintf("%.3f", as.numeric(value)),
      stat == "p.value" ~ sprintf("%.3f", as.numeric(value)),
      stat == "N" ~ sprintf("%9.0f", as.numeric(value))
    )
  ) %>%
  select(-c("p.value", "covariate"))  %>%
  # Pivot to make `estimate`, `std.error`, and `p.value` columns wide
  pivot_wider(
    names_from = label,
    values_from = value
  ) %>%
  # Filter out rows where `stat` is `p.value`
  filter(stat != "p.value")  %>%
  # Correct way to drop the `p.value` column
  mutate(
    # Format std.error with parentheses and ensure estimate retains significance stars
    across(starts_with("Awareness Index"):starts_with("Engagement Behavior Index"),
           ~ ifelse(stat == "estimate" | stat== "N", ., paste0("(", ., ")"))
    )
  ) %>%
  # Arrange terms in specified order
  arrange(
    match(term, c(
      "treatmentMedia Literacy:age",  "treatmentMedia Literacy:class", "treatmentMedia Literacy:gender_num", "treatmentMedia Literacy:non_scientific_health_beliefs_index", "treatmentMedia Literacy:media_exposure_index", "treatmentMedia Literacy:asset_index",  "treatmentMedia Literacy:state_el_2020_non_bjp_num", "treatmentMedia Literacy:religion_hindu_num", "treatmentMedia Literacy:caste_gen_num"  , "treatmentMedia Literacy:spillover_post" )),
    match(stat, c("estimate", "std.error", "N"))
  ) %>%
  mutate(
  # Remove `term` value for rows where `stat` is "std.error" or "n"
  term = ifelse(stat == "std.error", "", term), 
  term = ifelse(stat == "N", "N", term)
  ) %>%
  select(-stat) %>%
  rename(Outcome = term) %>%
  mutate(Outcome = case_when(
    Outcome == "treatmentMedia Literacy:gender_num" ~ "Treatment x Girl",
    Outcome == "treatmentMedia Literacy:non_scientific_health_beliefs_index" ~ "Treatment x Non-scientific beliefs",    
    Outcome == "treatmentMedia Literacy:asset_index" ~ "Treatment x Asset Index",    
    Outcome == "treatmentMedia Literacy:media_exposure_index" ~ "Treatment x Prior media exposure",   
    Outcome == "treatmentMedia Literacy:state_el_2020_non_bjp_num" ~ "Treatment x 2020 State Election non-BJP",
    Outcome == "treatmentMedia Literacy:age" ~ "Treatment x Age",
    Outcome == "treatmentMedia Literacy:class" ~ "Treatment x Grade",
    Outcome == "treatmentMedia Literacy:religion_hindu_num" ~ "Treatment x Hindu",
    Outcome == "treatmentMedia Literacy:caste_gen_num" ~ "Treatment x Gen. caste",
    Outcome == "treatmentMedia Literacy:spillover_post" ~ "Treatment x Low spillover stratum",
    TRUE ~ Outcome  # Keep other values unchanged
  )) %>%
  rename_with(~ gsub("Index$", "", .), ends_with("Index"))

regressions_hte_table <- regressions_hte_table %>%
  relocate("Awareness ", .after = last_col())

regressions_hte_table <- regressions_hte_table %>%
  select(Outcome, everything())

# Ensure no row names in final_data
rownames(regressions_hte_table) <- NULL

# Remove existing column names from final_data to avoid default headers in LaTeX output
colnames(regressions_hte_table) <- NULL

# Generate LaTeX table using xtable
latex_table_hte <- xtable(regressions_hte_table, caption = "Heterogeneity results", label = "tab:hte_estimates_terms")

# Manually specify header rows for two-line headers
header_row1_hte <- c("Outcome", "Accuracy", "Sharing", "Health", "Source", "Engagement", "Engagement", "Awareness")
header_row2_hte <- c("", "Discernment", "Discernment", "Preferences", "Discernment", "Attitude", "Behavior", "")

# Create the LaTeX code for the two-line headers with a single \hline after "Behavior" and no extra lines
header_latex_hte <- paste(
  paste(header_row1_hte, collapse = " & "), "\\\\",
  paste(header_row2_hte, collapse = " & "), "\\\\"
)

# Calculate the number of columns and rows as needed
num_cols <- ncol(regressions_hte_table) - 1  # Adjust for the alignment
num_rows <- nrow(latex_table_hte)     # Ensure numeric row count

# Capture the LaTeX output as a character vector without row names
latex_output_hte <- capture.output(print(latex_table_hte, type = "latex", include.rownames = FALSE, 
      sanitize.text.function = identity, 
      align = c("l", "l", rep("c", num_cols)),  # Use calculated number of columns
      add.to.row = list(
        pos = list(0, num_rows),  # Use calculated row count
  command = c(
  header_latex_hte,
  "\\hline \n Library-Spillover FEs&  Yes &  Yes &  Yes &  Yes &  Yes &  Yes &  Yes \\\\ \\hline\n\\multicolumn{6}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include library-spillover strata FEs.} \\\\\n"))))


# Find the index of `\centering` and insert `\small` right after it
centering_index_hte <- which(grepl("\\centering", latex_output_hte))
latex_output_hte <- append(latex_output_hte, "\\resizebox{\\linewidth}{!}{%", after = centering_index_hte)

# Replace `\end{tabular}` with `%\n}`
latex_output_hte <- sub("\\\\end\\{tabular\\}", 
                        "\\\\end\\{tabular\\}%\n}", 
                        latex_output_hte)

# Write the captured output to the file
writeLines(latex_output_hte, "output/tables/tab_hte_results.tex")

# ----
# Conduct teacher HTE analyses ----
## Female teacher ----
regressions_hte_teacher_female <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte_teacher(x, "teacher_female", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_teacher_female <- left_join(dv_labs_endline_main, regressions_hte_teacher_female, by = "dv")

## Muslim Teacher ----
regressions_hte_teacher_muslim <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte_teacher(x, "teacher_muslim", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_teacher_muslim <- left_join(dv_labs_endline_main, regressions_hte_teacher_muslim, by = "dv")

## Female teacher ----
regressions_hte_teacher_general <- lapply(dv_labs_endline_main$dv, \(x) tab_regression_hte_teacher(x, "teacher_general", bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term)) # remove intercepts

regressions_hte_teacher_general <- left_join(dv_labs_endline_main, regressions_hte_teacher_general, by = "dv")

## Summary table ----
# Create summary table
regressions_hte_teacher_female$covariate_lab <- "Female teacher"
regressions_hte_teacher_muslim$covariate_lab <- "Muslim teacher"
regressions_hte_teacher_general$covariate_lab <- "General caste teacher"

regressions_hte_teacher_summary_df <- rbind(
  regressions_hte_teacher_female,
  regressions_hte_teacher_muslim,
  regressions_hte_teacher_general)

regressions_hte_teacher_summary_df <- regressions_hte_teacher_summary_df %>%
  mutate(
    covariate_lab = factor(covariate_lab, 
                           levels = c(
                             "Female teacher", "Muslim teacher", "General caste teacher")))

# Filter for indices only
regressions_hte_teacher_summary_df <- regressions_hte_teacher_summary_df %>%
  filter(type == "idx")

# Filter for interaction terms
regressions_hte_teacher_summary_df <- regressions_hte_teacher_summary_df %>%
  filter( term == "treatmentMedia Literacy:teacher_female" | term=="treatmentMedia Literacy:teacher_muslim" | term=="treatmentMedia Literacy:teacher_general")

regressions_hte_teacher_table <- regressions_hte_teacher_summary_df %>%
  select(label, covariate, term, estimate, std.error,p.value, n) %>%
  arrange(term) %>%
  rename(N = n)  # Rename `n` to `N`

regressions_hte_teacher_table <- regressions_hte_teacher_table %>%
  filter(label != "Civic Participation Index") %>%
  pivot_wider(
    names_from = label,
    values_from = c(estimate, std.error, p.value, N)
  )

regressions_hte_teacher_table <- regressions_hte_teacher_table %>%
  pivot_longer(
    cols = starts_with("estimate_") | starts_with("std.error_") | starts_with("p.value_") | starts_with("N_"),
    names_to = c("stat", "label"),
    names_sep = "_",
    values_to = "value",
    values_transform = list(value = as.character)  # Convert to character to handle stars and numeric together
  )


p_values_hte_teacher_summary <- regressions_hte_teacher_table %>%
  filter(stat == "p.value") %>%
  select(label, term, p.value = value)  # Rename `value` to `p.value` for clarity

regressions_hte_teacher_table <- regressions_hte_teacher_table %>%
  left_join(p_values_hte_teacher_summary, by = c("label", "term"))

regressions_hte_teacher_table <- regressions_hte_teacher_table %>%
  mutate(
    # Round and add significance stars based on `p.value`
    value = case_when(
      stat == "estimate" ~ paste0(sprintf("%.3f", as.numeric(value)),
                                  case_when(
                                    as.numeric(p.value) < 0.001 ~ "***",
                                    as.numeric(p.value) < 0.01 ~ "**",
                                    as.numeric(p.value) < 0.05 ~ "*",
                                    TRUE ~ ""
                                  )
      ),
      stat == "std.error" ~ sprintf("%.3f", as.numeric(value)),
      stat == "p.value" ~ sprintf("%.3f", as.numeric(value)),
      stat == "N" ~ sprintf("%9.0f", as.numeric(value))
    )
  ) %>%
  select(-c("p.value", "covariate"))  %>%
  # Pivot to make `estimate`, `std.error`, and `p.value` columns wide
  pivot_wider(
    names_from = label,
    values_from = value
  ) %>%
  # Filter out rows where `stat` is `p.value`
  filter(stat != "p.value")  %>%
  # Correct way to drop the `p.value` column
  mutate(
    # Format std.error with parentheses and ensure estimate retains significance stars
    across(starts_with("Awareness Index"):starts_with("Engagement Behavior Index"),
           ~ ifelse(stat == "estimate" | stat== "N", ., paste0("(", ., ")"))
    )
  ) %>%
  # Arrange terms in specified order
  arrange(
    match(term, c(
      "treatmentMedia Literacy:teacher_female",  "treatmentMedia Literacy:teacher_muslim", "treatmentMedia Literacy:teacher_general")),
    match(stat, c("estimate", "std.error", "N"))
  ) %>%
  mutate(
    # Remove `term` value for rows where `stat` is "std.error" or "n"
    term = ifelse(stat == "std.error", "", term), 
    term = ifelse(stat == "N", "N", term)
  ) %>%
  select(-stat) %>%
  rename(Outcome = term) %>%
  mutate(Outcome = case_when(
    Outcome == "treatmentMedia Literacy:teacher_female" ~ "Treatment x Female teacher",
    Outcome == "treatmentMedia Literacy:teacher_muslim" ~ "Treatment x Muslim teacher",    
    Outcome == "treatmentMedia Literacy:teacher_general" ~ "Treatment x General caste teacher",
    TRUE ~ Outcome  # Keep other values unchanged
  )) %>%
  rename_with(~ gsub("Index$", "", .), ends_with("Index"))

regressions_hte_teacher_table <- regressions_hte_teacher_table %>%
  relocate("Awareness ", .after = last_col())

regressions_hte_teacher_table <- regressions_hte_teacher_table %>%
  select(Outcome, everything())

# Ensure no row names in final_data
rownames(regressions_hte_teacher_table) <- NULL

# Remove existing column names from final_data to avoid default headers in LaTeX output
colnames(regressions_hte_teacher_table) <- NULL

# Generate LaTeX table using xtable
latex_table_hte_teacher <- xtable(regressions_hte_teacher_table, caption = "Heterogeneity results by teacher characteristics", label = "tab:hte_teacher_estimates_terms")

# Manually specify header rows for two-line headers
header_row1_hte_teacher <- c("Outcome", "Accuracy", "Sharing", "Health", "Source", "Engagement", "Engagement", "Awareness")
header_row2_hte_teacher <- c("", "Discernment", "Discernment", "Preferences", "Discernment", "Attitude", "Behavior", "")

# Create the LaTeX code for the two-line headers with a single \hline after "Behavior" and no extra lines
header_latex_hte_teacher <- paste(
  paste(header_row1_hte_teacher, collapse = " & "), "\\\\",
  paste(header_row2_hte_teacher, collapse = " & "), "\\\\"
)

# Calculate the number of columns and rows as needed
num_cols_teacher <- ncol(regressions_hte_teacher_table) - 1  # Adjust for the alignment
num_rows_teacher <- nrow(latex_table_hte_teacher)     # Ensure numeric row count

# Capture the LaTeX output as a character vector without row names
latex_output_hte_teacher <- 
  capture.output(print(latex_table_hte_teacher, type = "latex", include.rownames = FALSE, 
                       sanitize.text.function = identity, 
                       align = c("l", "l", rep("c", num_cols_teacher)),  # Use calculated number of columns
                       add.to.row = list(
                         pos = list(0, num_rows_teacher),  # Use calculated row count
                         command = c(
                           header_latex_hte_teacher,
                           "\\hline \n District FEs&  Yes &  Yes &  Yes &  Yes &  Yes &  Yes &  Yes \\\\ \\hline\n\\multicolumn{6}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include district FEs.} \\\\\n"))))


# Find the index of `\centering` and insert `\small` right after it
centering_index_hte_teacher <- which(grepl("\\centering", latex_output_hte_teacher))
latex_output_hte_teacher <- append(latex_output_hte_teacher, "\\resizebox{\\linewidth}{!}{%", after = centering_index_hte_teacher)

# Replace `\end{tabular}` with `%\n}`
latex_output_hte_teacher <- sub("\\\\end\\{tabular\\}", 
                                "\\\\end\\{tabular\\}%\n}", 
                                latex_output_hte_teacher)

# Write the captured output to the file
writeLines(latex_output_hte_teacher, "output/tables/tab_hte_teacher_results.tex")


# END of 10_hetereogeneity_analyses.R ----
